library(tidyverse)
library(janitor)
library(naniar)
library(ggmap)
library(shiny)
library("tidyverse")
library("naniar")
library("janitor")
library("stringr")
library(tidyverse)
library(janitor)
library(naniar)
library(tidyverse)
library(skimr)
library(janitor)
library(palmerpenguins)
library(gtools)
library(RColorBrewer)
library(paletteer)
library(ggthemes)
options(scipen = 999)
locations <- read.csv("Health_Facility_General_Information.csv") %>%
clean_names()
cardiac <- read_csv("cardiac-surgery.csv") %>%
clean_names() %>%
filter(facility_id!=0)
## Rows: 1609 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Hospital Name, Detailed Region, Region, Procedure, Year of Hospital...
## dbl (8): Facility ID, Number of Cases, Number of Deaths, Observed Mortality ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#COMPARING NUMBER OF TOTAL PROCEDURES WITH AVERAGE MORTALITY RATE Per procedure
total_cases <- cardiac %>%
group_by(facility_id, procedure) %>%
summarize(total_procedures = sum(number_of_cases), avg_mortality = mean(observed_mortality_rate)) %>%
ggplot(aes(x = total_procedures, y = avg_mortality))+
geom_point(color = "olivedrab") +
labs(title = "Average Mortality Rate compared with Procedure Numbers ",
x = "Total Number of Procedures",
y = "Mortality Rate")+
theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 5))+theme_classic()+facet_wrap(~procedure)+scale_x_log10()+geom_smooth(method=lm, se=F)
## `summarise()` has grouped output by 'facility_id'. You can override using the
## `.groups` argument.
total_cases
## `geom_smooth()` using formula = 'y ~ x'
#COMPARING NUMBER OF TOTAL PROCEDURES WITH AVERAGE MORTALITY RATE
cardiac %>%
group_by(facility_id, procedure) %>%
summarize(total_procedures = sum(number_of_cases), avg_mortality = mean(observed_mortality_rate)) %>%
ggplot(aes(x = total_procedures, y = avg_mortality))+
geom_point(color = "olivedrab") +
labs(title = "Average Mortality Rate compared with Procedure Numbers ",
x = "Total Number of Procedures",
y = "Mortality Rate")+
theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 5))+theme_classic()+scale_x_log10()+geom_smooth(method=lm, se=F)
## `summarise()` has grouped output by 'facility_id'. You can override using the
## `.groups` argument.
## `geom_smooth()` using formula = 'y ~ x'
locations <- locations %>%
group_by(facility_id) %>%
summarise(across(everything(), first))
cardiac <- left_join(cardiac, locations, by="facility_id")
Map:
#Overall deaths for all years:
lat <- c(40.58, 44.70) #we got tehse values from summary above
long <- c(-78.87, -72.98)
bbox <- make_bbox(long, lat, f = 0.03)
maptotal <- get_stadiamap(bbox, maptype = "stamen_terrain", zoom=7) #Make ur basemap, there are other styles which you could find from this link, https://docs.stadiamaps.com/themes/
## ℹ © Stadia Maps © Stamen Design © OpenMapTiles © OpenStreetMap contributors.
x <- cardiac %>%
arrange(hospital_name) %>%
group_by(facility_id, hospital_name, facility_latitude, facility_longitude) %>%
summarise(number_of_cases=sum(number_of_cases), average_moratality_rate=mean(observed_mortality_rate), .groups = 'keep') %>%
mutate(total_deaths=average_moratality_rate/100*number_of_cases)
ggmap(maptotal) +
geom_point(data = x, aes(facility_longitude, facility_latitude)) + labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
register_stadiamaps("e2de824a-0995-4a51-915c-33c14c061e7b", write = FALSE)
cardiac %>% #GET MAX LAT AND LON FROM CODE BELOW!!! EASY MAKES SENSE
select(facility_longitude, facility_latitude) %>%
summary()
## facility_longitude facility_latitude
## Min. :-78.87 Min. :40.58
## 1st Qu.:-75.27 1st Qu.:40.74
## Median :-73.95 Median :40.88
## Mean :-74.65 Mean :41.61
## 3rd Qu.:-73.80 3rd Qu.:42.73
## Max. :-72.98 Max. :44.70
## NA's :65 NA's :65
#For number of cases
ggmap(maptotal) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = number_of_cases)) + #WE just add the chart in without using ggplot!
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
#For moratlliry rate
ggmap(maptotal) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = average_moratality_rate)) + #WE just add the chart in without using ggplot!
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
#For total deaths
ggmap(maptotal) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = total_deaths)) + #WE just add the chart in without using ggplot!
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
#Zoom NYC:
lat1 <- c(40.58, 41.2) #we got tehse values from summary above
long1 <- c(-74.5, -72.98)
bbox1 <- make_bbox(long1, lat1, f = 0.03)
mapnyc <- get_stadiamap(bbox1, maptype = "stamen_terrain", zoom=8)
## ℹ © Stadia Maps © Stamen Design © OpenMapTiles © OpenStreetMap contributors.
#NYC Rate of mortaility
ggmap(mapnyc) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = average_moratality_rate)) +
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") #colour??
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
#NYC Total Cases
ggmap(mapnyc) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = number_of_cases)) +
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") #colour??
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
#NYC total deaths
ggmap(mapnyc) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = total_deaths)) +
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") #colour??
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
cardiac %>%
group_by(facility_id) %>%
summarise(mean_observed_mortality=mean(observed_mortality_rate))
## # A tibble: 67 × 2
## facility_id mean_observed_mortality
## <dbl> <dbl>
## 1 1 2.47
## 2 5 2.08
## 3 58 2.42
## 4 66 0.659
## 5 116 2.35
## 6 135 2.34
## 7 181 1.49
## 8 207 2.10
## 9 210 1.42
## 10 213 1.98
## # ℹ 57 more rows
cardiac %>%
group_by(facility_id) %>%
summarise(mean_observed_mortality=mean(observed_mortality_rate)) %>%
summarise(std=sd(mean_observed_mortality), mean_observed_mortality=mean(mean_observed_mortality), minmumum=min(mean_observed_mortality), maximum=max(mean_observed_mortality)) #Idk why this did not work.
## # A tibble: 1 × 4
## std mean_observed_mortality minmumum maximum
## <dbl> <dbl> <dbl> <dbl>
## 1 1.20 2.05 2.05 2.05
x %>%
ggplot(aes(x=average_moratality_rate, y=total_deaths))+geom_point()
x %>%
ggplot(aes(x=number_of_cases, y=total_deaths))+geom_point()+geom_smooth(method=lm, se=F)
## `geom_smooth()` using formula = 'y ~ x'
cardiac <- cardiac %>%
mutate(across(everything(), tolower))
cardiac <- cardiac %>%
mutate(across(everything(), tolower)) %>%
mutate(across(everything(), ~ str_replace_all((.), " ", "_"))) %>%
mutate(across(everything(), ~ str_replace_all((.), "__", "_"))) %>%
mutate(across(-"year_of_hospital_discharge", ~ str_remove((.), "-"))) %>%
mutate(across(number_of_cases:risk_adjusted_mortality_rate, as.numeric))
Have to separate the data, as some of them are over a range of a couple of years, some of them are year by year
sites_with_ranges <- cardiac %>%
filter(str_detect(year_of_hospital_discharge, "-", negate = FALSE))
sites_per_year <- cardiac %>%
filter(str_detect(year_of_hospital_discharge, "-", negate = TRUE))
What is the procedure with the highest observed mortality rate?
cardiac %>%
group_by(procedure) %>%
summarize(avg_mortality = mean(observed_mortality_rate)) %>%
arrange(-avg_mortality)
## # A tibble: 6 × 2
## procedure avg_mortality
## <chr> <dbl>
## 1 tavr 4.66
## 2 valve_or_valve/cabg 3.80
## 3 emergency_pci 3.12
## 4 cabg 1.52
## 5 all_pci 1.22
## 6 nonemergency_pci 0.720
sites_per_year %>%
group_by(procedure) %>%
summarize(avg_mortality = mean(observed_mortality_rate)) %>%
ggplot(aes(x = reorder(procedure, avg_mortality), y = avg_mortality))+
geom_col(color = "black", fill = "lightblue")+
coord_flip()+
labs(title = "Average Observed Mortality Rate by Procedure",
x = "Procedure",
y = "Mortality Rate")+
theme_light(base_size = 12)
sites_with_ranges %>%
group_by(procedure) %>%
summarize(avg_mortality = mean(observed_mortality_rate)) %>%
ggplot(aes(x = reorder(procedure, avg_mortality), y = avg_mortality))+
geom_col(color = "black", fill = "lightblue")+
coord_flip()+
labs(title = "Average Observed Mortality Rate by Procedure",
x = "Procedure",
y = "Mortality Rate")+
theme_light(base_size = 12)
How has the mortality rate changed for cardiac surgery over time in each region?
#I'll come back to this later if I figure out the year stuff but feel free to take a crack at it :). This is mean observed Rate
sites_per_year %>%
group_by(year_of_hospital_discharge) %>%
summarize(observed_mortality_rate=mean(observed_mortality_rate)) %>%
ggplot(aes(x =year_of_hospital_discharge, y = observed_mortality_rate))+
geom_col(color = "steelblue", na.rm = T) + geom_smooth(method = lm, se= TRUE) +
labs(title = "Regional Cardiac Surgery Morality Rates",
x = "Year of Hospital Discharge",
y = "Mean Observed Mortality Rate")
## `geom_smooth()` using formula = 'y ~ x'
sites_with_ranges %>%
group_by(year_of_hospital_discharge) %>%
summarize(observed_mortality_rate=mean(observed_mortality_rate)) %>%
ggplot(aes(x = observed_mortality_rate, y = year_of_hospital_discharge))+
geom_col(color = "steelblue") + geom_smooth(method = lm, se= TRUE) +
labs(title = "Regional Cardiac Surgery Morality Rates",
x = "Mean Observed Mortality Rate",
y = "Year of Hospital Discharge")
## `geom_smooth()` using formula = 'y ~ x'
How does the expected mortality rate compare to the observed mortality rate?
#I don't know which graph to plot for this one but if you can figure it out that would be great. I was thinking maybe scatter but I don't know for sure. I THINK SCATTER IS A GREAT CHOICE BUT IDk why we need two because they are still the same concept... my opinion is cardiac below:
cardiac %>%
ggplot(aes(x = expected_mortality_rate, y = observed_mortality_rate))+
geom_point(color = "steelblue") + geom_smooth(method = lm, se= TRUE) +
labs(title = "Expected vs Observed Mortality Rate",
x = "Expected Mortality",
y = "Observed Mortality")
## `geom_smooth()` using formula = 'y ~ x'
sites_per_year %>%
ggplot(aes(x = expected_mortality_rate, y = observed_mortality_rate))+
geom_point(color = "steelblue") + geom_smooth(method = lm, se= TRUE) +
labs(title = "Expected vs Observed Mortality Rate",
x = "Expected Mortality",
y = "Observed Mortality")
## `geom_smooth()` using formula = 'y ~ x'
sites_with_ranges %>%
ggplot(aes(x = expected_mortality_rate, y = observed_mortality_rate))+
geom_point(color = "steelblue") + geom_smooth(method = lm, se= TRUE) +
labs(title = "Expected vs Observed Mortality Rate",
x = "Expected Mortality",
y = "Observed Mortality")
## `geom_smooth()` using formula = 'y ~ x'
Which region has the highest mortality rate?
cardiac %>%
group_by(region) %>%
summarize(avg_mortality = mean(observed_mortality_rate)) %>%
arrange(-avg_mortality)
## # A tibble: 7 × 2
## region avg_mortality
## <chr> <dbl>
## 1 western_ny__rochester 2.39
## 2 central_ny 2.30
## 3 ny_metro__nyc 2.19
## 4 capital_district 1.86
## 5 ny_metro__new_rochelle 1.80
## 6 ny_metro__long_island 1.74
## 7 western_ny__buffalo 1.67
cardiac %>%
group_by(region) %>%
summarize(avg_mortality = mean(observed_mortality_rate)) %>%
ggplot(aes(x = reorder(region, avg_mortality), y = avg_mortality))+
geom_col(color = "black", fill = "lightpink")+
coord_flip()+
labs(title = "Average Observed Mortality Rate Per Region",
x = "Region",
y = "Mortality Rate")+
theme_light(base_size = 12)
Don’t mind the code and names for this, I’m testing out some stuff :D
library(shinydashboard)
##
## Attaching package: 'shinydashboard'
## The following object is masked from 'package:graphics':
##
## box
ui <- dashboardPage(skin = "purple",
dashboardHeader(title = "Cardiac Dashboard"),
## Sidebar content
dashboardSidebar(
sidebarMenu(
menuItem("Observed Mortality Rate",
tabName = "dashboard",
icon = icon("dashboard")),
menuItem("Counts",
tabName = "widgets",
icon = icon("th")),
menuItem("Cases",
tabName = "cases",
icon = icon("folder-open")),
menuItem("Maps",
tabName = "map",
icon = icon("earth-americas"))
)
),
## Body content
dashboardBody(
tabItems(
## First tab content
tabItem(tabName = "dashboard",
fluidRow(
box(plotOutput("plot1a", height = 250)), # box is a container for the plot
box(title = "Controls", # box is a container for the controls
selectInput("region",
"Select Region of Interest:",
choices=unique(cardiac$region))
)
),
fluidRow(
box(plotOutput("plot1b", height = 250)),
box(title = "Controls",
selectInput("year",
"Select Year of Interest:",
choices = unique(cardiac$year_of_hospital_discharge)))
)
),
## Second tab item
tabItem(tabName = "widgets",
fluidRow(
box(plotOutput("plot2", height = 250)), # box is a container for the plot
box(title = "Controls", # box is a container for the controls
radioButtons("x",
"Select Fill Variable",
choices=c("region", "procedure"),
selected="region")
)
)
),
## Third tab item
tabItem(tabName = "cases",
fluidRow(
box(plotOutput("plot3a", height = 250)), # box is a container for the plot
box(title = "Controls", # box is a container for the controls
selectInput("a",
"Select Hospital",
choices=c(unique(cardiac$hospital_name))
)
)
),
fluidRow(
box(plotOutput("plot3b", height = 250)), # box is a container for the plot
box(title = "Controls", # box is a container for the controls
selectInput("b",
"Select Hospital",
choices=c(unique(cardiac$hospital_name))
)
)
)
),
tabItem(tabName = "map",
fluidRow(
box(plotOutput("plot4a", height = 250)),
box(title = "Controls",
selectInput("m",
"Select Region",
choices = c(unique(cardiac$region))
)
)
),
fluidRow(
box(plotOutput("plot4b", height = 450))
)
)
)
)
)
server <- function(input, output, session) {
session$onSessionEnded(stopApp)
output$plot1a <- renderPlot({
cardiac %>%
filter(region == input$region) %>%
ggplot(aes(x = observed_mortality_rate))+
geom_density(color = "black", fill = "steelblue", alpha = 0.6)
})
output$plot1b <- renderPlot({
cardiac %>%
filter(year_of_hospital_discharge == input$year) %>%
ggplot(aes(x = observed_mortality_rate))+
geom_density(color = "black", fill = "steelblue", alpha = 0.6)
})
output$plot2 <- renderPlot({
cardiac %>%
ggplot(aes_string(x="region", fill=input$x))+
geom_bar(position="dodge", alpha=0.8, color="black")+
labs(x=NULL, y=NULL, fill="Fill Variable")
})
output$plot3a <- renderPlot({
sites_per_year %>%
filter(hospital_name == input$a) %>%
group_by(year_of_hospital_discharge) %>%
summarize(total_cases_a = sum(number_of_cases)) %>%
ggplot(aes_string(x="year_of_hospital_discharge", y = "total_cases_a"))+
coord_flip()+
geom_col(alpha=0.8, color="black", fill = "steelblue")+
labs(title = "Number of Cases per Year",
x= "Year",
y= "Number of Cases")+
theme_light(base_size = 14)
})
output$plot3b <- renderPlot({
sites_with_ranges %>%
filter(hospital_name == input$b) %>%
group_by(year_of_hospital_discharge) %>%
summarize(total_cases_b = sum(number_of_cases)) %>%
ggplot(aes_string(x="year_of_hospital_discharge", y = "total_cases_b"))+
coord_flip()+
geom_col(alpha=0.8, color="black", fill = "lightblue")+
labs(title = "Number of Cases per Year Range",
x= "Year Range",
y= "Number of Cases")+
theme_light(base_size = 14)
})
output$plot4a <- renderPlot({
ggmap(map2)+
geom_point(data = cardiac,
aes(facility_longitude, facility_latitude),
na.rm = T) #trying to use the select input
})
output$plot4b <- renderPlot({
ggmap(map1) +
geom_point(data = cardiac,
aes(facility_longitude, facility_latitude, size = number_of_cases, color = number_of_cases),
na.rm = T)+
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
})
}
shinyApp(ui, server)